Cadences#

import os
from collections import defaultdict, Counter

from git import Repo
import dimcat as dc
import ms3
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

from utils import STD_LAYOUT, CADENCE_COLORS, color_background, value_count_df, get_repo_name, resolve_dir
CORPUS_PATH = os.environ.get('CORPUS_PATH', "~/dcml_corpora")
print(f"CORPUS_PATH: '{CORPUS_PATH}'")
CORPUS_PATH = resolve_dir(CORPUS_PATH)
CORPUS_PATH: '~/dcml_corpora'
repo = Repo(CORPUS_PATH)
notebook_repo = Repo('.', search_parent_directories=True)
print(f"Notebook repository '{get_repo_name(notebook_repo)}' @ {notebook_repo.commit().hexsha[:7]}")
print(f"Data repo '{get_repo_name(CORPUS_PATH)}' @ {repo.commit().hexsha[:7]}")
print(f"dimcat version {dc.__version__}")
print(f"ms3 version {ms3.__version__}")
Notebook repository 'data_reports' @ fc05ef0
Data repo 'dcml_corpora' @ 7b1478f
dimcat version 0.3.0.post1.dev13+ga5d37ea
ms3 version 1.2.5

Data loading#

Detected files#

dataset = dc.Dataset()
dataset.load(directory=CORPUS_PATH, parse_tsv=False)
dataset.data
No files have been parsed for analysis.
[default|all]
All corpora
-----------
View: This view is called 'default'. It
	- excludes fnames that are not contained in the metadata,
	- filters out file extensions requiring conversion (such as .xml), and
	- excludes review files and folders.

                               has   active   scores measures    notes expanded
                          metadata     view detected detected detected detected
corpus
ABC                            yes  default       70       70       70       70
beethoven_piano_sonatas        yes  default       87       87       87       64
chopin_mazurkas                yes  default       55       55       55       55
corelli                        yes  default      149      149      149      149
debussy_suite_bergamasque      yes  default        4        4        4        4
dvorak_silhouettes             yes  default       12       12       12       12
grieg_lyric_pieces             yes  default       66       66       66       66
liszt_pelerinage               yes  default       19       19       19       19
medtner_tales                  yes  default       19       19       19       19
mozart_piano_sonatas           yes  default       54       54       54       54
schumann_kinderszenen          yes  default       13       13       13       13
tchaikovsky_seasons            yes  default       12       12       12       12

3838/11057 files are excluded from this view.

3759 files have been excluded based on their subdir.
79 files have been excluded based on their file name.


There are 1 orphans that could not be attributed to any of the respective corpus's fnames.

Filtering#

annotated_view = dataset.data.get_view('annotated')
annotated_view.include('facets', 'expanded')
annotated_view.fnames_with_incomplete_facets = False
dataset.data.set_view(annotated_view)
dataset.data.parse_tsv(choose='auto')
dataset.get_indices()
dataset.data
[annotated|all|default]
All corpora
-----------
View: This view is called 'annotated'. It
	- excludes fnames that are not contained in the metadata,
	- excludes pieces that do not have at least one file per selected facet,
	- filters out file extensions requiring conversion (such as .xml),
	- excludes review files and folders, and
	- includes only facets containing 'expanded'.

                               has     active expanded
                          metadata       view detected parsed
corpus
ABC                            yes  annotated       70     70
beethoven_piano_sonatas        yes  annotated       64     64
chopin_mazurkas                yes  annotated       55     55
corelli                        yes  annotated      149    149
debussy_suite_bergamasque      yes  annotated        4      4
dvorak_silhouettes             yes  annotated       12     12
grieg_lyric_pieces             yes  annotated       66     66
liszt_pelerinage               yes  annotated       19     19
medtner_tales                  yes  annotated       19     19
mozart_piano_sonatas           yes  annotated       54     54
schumann_kinderszenen          yes  annotated       13     13
tchaikovsky_seasons            yes  annotated       12     12

11/12 facets are excluded from this view.


There are 1 orphans that could not be attributed to any of the respective corpus's fnames.
print(f"N = {dataset.data.count_pieces()} annotated pieces, {dataset.data.count_parsed_tsvs()} parsed dataframes.")
N = 537 annotated pieces, 537 parsed dataframes.

Metadata#

all_metadata = dataset.data.metadata()
print(f"Concatenated 'metadata.tsv' files cover {len(all_metadata)} of the {dataset.data.count_pieces()} scores.")
all_metadata.reset_index(level=1).groupby(level=0).nth(0).iloc[:,:20]
Concatenated 'metadata.tsv' files cover 537 of the 537 scores.
fname TimeSig KeySig last_mc last_mn length_qb last_mc_unfolded last_mn_unfolded length_qb_unfolded volta_mcs all_notes_qb n_onsets n_onset_positions guitar_chord_count form_label_count label_count annotated_key harmony_version annotators reviewers
corpus
ABC n01op18-1_01 1: 3/4 1: -1 313 313 939.0 427.0 427.0 1281.0 NaN 3132.75 4589 1950 0 0 405 F 1.0.0 Markus Neuwirth NaN
beethoven_piano_sonatas 01-1 1: 2/2 1: -4 154 152 608.0 308.0 304.0 1216.0 NaN 1476.00 1679 985 0 0 241 f 2.3.0 Lars & Ya-Chuan (2.2.0), John Heilig (2.3.0) AN
chopin_mazurkas BI105-2op30-2 1: 3/4 1: 2 65 64 193.0 65.0 64.0 193.0 NaN 711.00 810 274 0 0 116 b 2.3.0 Wendelin Bitzan (1.0.0), Adrian Nagel (2.2.0),... JH, AN, DK
corelli op01n01a 1: 4/4 1: -1 14 14 56.0 14.0 14.0 56.0 NaN 224.00 280 110 0 0 64 F 2.3.0 Lars Opfermann, Ya-Chuan Wu (2.1.1), Hanné Bec... HB, JH
debussy_suite_bergamasque l075-01_suite_prelude 1: 4/4 1: -1 89 89 356.0 89.0 89.0 356.0 NaN 1533.67 1721 870 0 0 274 F 2.3.0 Adrian Nagel (2.1.1), Amelia Brey (2.3.0) AB, AN
dvorak_silhouettes op08n01 1: 6/8 1: 4, 7: -5, 49: 4 54 52 156.5 54.0 52.0 156.5 NaN 658.75 957 288 0 0 80 c# 2.3.0 Daniel Grote (2.1.1), Hanné Becker (2.3.0) Johannes Hentschel (2.1.1), AN
grieg_lyric_pieces op12n01 1: 2/4 1: -3 23 23 46.0 23.0 23.0 46.0 NaN 135.50 268 156 0 0 43 Eb 2.3.0 Adrian Nagel (2.1.1), John Heilig (2.30) Adrian Nagel
liszt_pelerinage 160.01_Chapelle_de_Guillaume_Tell 1: 4/4 1: 0 97 97 388.0 97.0 97.0 388.0 NaN 1902.42 2879 1069 0 0 174 C 2.3.0 Adrian Nagel (2.1.1), Amelia Brey (2.3.0) Johannes Hentschel (1-33 & 82-97), AB, AN
medtner_tales op08n01 1: 4/8 1: -3 81 81 162.0 81.0 81.0 162.0 NaN 603.00 1481 528 0 0 213 c 2.3.0 Wendelin Bitzan (2.2.0), John Heilig (2.3.0) Adrian Nagel, DK
mozart_piano_sonatas K279-1 1: 4/4 1: 0 100 100 400.0 200.0 200.0 800.0 NaN 767.00 2031 1441 0 0 251 C NaN Uli Kneisel Johannes Hentschel, Markus Neuwirth
schumann_kinderszenen n01 1: 2/4 1: 1 22 22 44.0 44.0 44.0 88.0 NaN 134.33 241 141 0 0 44 G 2.3.0 Tal Soker (2.1.1), John Heilig (2.3.0) AN, JHei, JH
tchaikovsky_seasons op37a01 1: 3/4 1: 3, 29: 1, 63: 3 103 103 309.0 103.0 103.0 309.0 NaN 1058.17 1537 829 0 0 313 A 2.3.0 Adrian Nagel (2.1.1), John Heilig (2.3.0) Johannes Hentschel, AN

All annotation labels from the selected pieces#

all_labels = dataset.data.get_facet('expanded')

print(f"{len(all_labels.index)} hand-annotated harmony labels:")
all_labels.iloc[:20].style.apply(color_background, subset="chord")
115351 hand-annotated harmony labels:
      mc mn quarterbeats duration_qb mc_onset mn_onset timesig staff voice label globalkey localkey pedal chord special numeral form figbass changes relativeroot cadence phraseend chord_type globalkey_is_minor localkey_is_minor chord_tones added_tones root bass_note alt_label volta pedalend placement
corpus fname i                                                                  
ABC n01op18-1_01 0 1 1 0 3.000000 0 0 3/4 4 1 F.I F I nan I nan I nan nan nan nan nan nan M False False (0, 4, 1) () 0 0 nan nan nan
1 2 2 3 3.000000 0 0 3/4 4 1 V F I nan V nan V nan nan nan nan nan nan M False False (1, 5, 2) () 1 1 nan nan nan
2 3 3 6 3.000000 0 0 3/4 4 1 I F I nan I nan I nan nan nan nan nan nan M False False (0, 4, 1) () 0 0 nan nan nan
3 4 4 9 6.000000 0 0 3/4 4 1 IV6 F I nan IV6 nan IV nan 6 nan nan nan nan M False False (3, 0, -1) () -1 3 nan nan nan
4 6 6 15 3.000000 0 0 3/4 4 1 V65 F I nan V65 nan V nan 65 nan nan nan nan Mm7 False False (5, 2, -1, 1) () 1 5 nan nan nan
5 7 7 18 1.000000 0 0 3/4 4 1 I F I nan I nan I nan nan nan nan nan nan M False False (0, 4, 1) () 0 0 nan nan nan
6 7 7 19 1.000000 1/4 1/4 3/4 4 1 vi F I nan vi nan vi nan nan nan nan nan nan m False False (3, 0, 4) () 3 3 nan nan nan
7 7 7 20 1.000000 1/2 1/2 3/4 4 1 ii6 F I nan ii6 nan ii nan 6 nan nan nan nan m False False (-1, 3, 2) () 2 -1 nan nan nan
8 8 8 21 2.000000 0 0 3/4 4 1 V(64) F I nan V(64) nan V nan nan 64 nan nan nan M False False (1, 0, 4) () 1 1 nan nan nan
9 8 8 23 1.000000 1/2 1/2 3/4 4 1 V\\ F I nan V nan V nan nan nan nan nan \\ M False False (1, 5, 2) () 1 1 nan nan nan
10 9 9 24 3.000000 0 0 3/4 4 1 I F I nan I nan I nan nan nan nan nan nan M False False (0, 4, 1) () 0 0 nan nan nan
11 10 10 27 3.000000 0 0 3/4 4 1 V F I nan V nan V nan nan nan nan nan nan M False False (1, 5, 2) () 1 1 nan nan nan
12 11 11 30 3.000000 0 0 3/4 4 1 I F I nan I nan I nan nan nan nan nan nan M False False (0, 4, 1) () 0 0 nan nan nan
13 12 12 33 6.000000 0 0 3/4 4 1 IV6 F I nan IV6 nan IV nan 6 nan nan nan nan M False False (3, 0, -1) () -1 3 nan nan nan
14 14 14 39 2.000000 0 0 3/4 4 1 #viio7(6)/vi F I nan #viio7(6)/vi nan #vii o 7 6 vi nan nan o7 False False (8, 5, 4, -1) () 8 8 nan nan nan
15 14 14 41 4.000000 1/2 1/2 3/4 4 1 #viio7/vi F I nan #viio7/vi nan #vii o 7 nan vi nan nan o7 False False (8, 5, 2, -1) () 8 8 nan nan nan
16 16 16 45 2.000000 0 0 3/4 4 1 #viio7(4)/ii F I nan #viio7(4)/ii nan #vii o 7 4 ii nan nan o7 False False (7, -1, 1, -2) () 7 7 nan nan nan
17 16 16 47 4.000000 1/2 1/2 3/4 4 1 #viio7/ii F I nan #viio7/ii nan #vii o 7 nan ii nan nan o7 False False (7, 4, 1, -2) () 7 7 nan nan nan
18 18 18 51 1.500000 0 0 3/4 4 1 ii6(11#7b6) F I nan ii6(11#7b6) nan ii nan 6 11#7b6 nan nan nan m False False (-1, -2, 7) (1,) 2 -1 nan nan nan
19 18 18 105/2 1.500000 3/8 3/8 3/4 4 1 ii6 F I nan ii6 nan ii nan 6 nan nan nan nan m False False (-1, 3, 2) () 2 -1 nan nan nan

Filtering out pieces without cadence annotations#

hascadence = dc.HasCadenceAnnotationsFilter().process_data(dataset)
print(f"Before: {len(dataset.indices[()])} pieces; after removing those without cadence labels: {len(hascadence.indices[()])}")
Before: 537 pieces; after removing those without cadence labels: 457

Show corpora containing pieces with cadence annotations#

grouped_by_corpus = dc.CorpusGrouper().process_data(hascadence)
corpora = {group[0]: f"{len(ixs)} pieces" for group, ixs in  grouped_by_corpus.indices.items()}
print(f"{len(corpora)} corpora with {sum(map(len, grouped_by_corpus.indices.values()))} pieces containing cadence annotations:")
corpora
11 corpora with 457 pieces containing cadence annotations:
{'beethoven_piano_sonatas': '64 pieces',
 'chopin_mazurkas': '50 pieces',
 'corelli': '148 pieces',
 'debussy_suite_bergamasque': '4 pieces',
 'dvorak_silhouettes': '12 pieces',
 'grieg_lyric_pieces': '65 pieces',
 'liszt_pelerinage': '19 pieces',
 'medtner_tales': '16 pieces',
 'mozart_piano_sonatas': '54 pieces',
 'schumann_kinderszenen': '13 pieces',
 'tchaikovsky_seasons': '12 pieces'}

All annotation labels from the selected pieces#

all_labels = hascadence.get_facet('expanded')

print(f"{len(all_labels.index)} hand-annotated harmony labels:")
all_labels.iloc[:10, 14:].style.apply(color_background, subset="chord")
84541 hand-annotated harmony labels:
      chord special numeral form figbass changes relativeroot cadence phraseend chord_type globalkey_is_minor localkey_is_minor chord_tones added_tones root bass_note volta pedalend placement
corpus fname interval                                      
beethoven_piano_sonatas 01-1 [0.0, 9.0) i nan i nan nan nan nan nan { m True True (0, -3, 1) () 0 0 nan nan
[9.0, 17.0) V65 nan V nan 65 nan nan nan nan Mm7 True True (5, 2, -1, 1) () 1 5 nan nan
[17.0, 21.0) i nan i nan nan nan nan nan nan m True True (0, -3, 1) () 0 0 nan nan
[21.0, 25.0) #viio6 nan #vii o 6 nan nan nan nan o True True (2, -1, 5) () 5 2 nan nan
[25.0, 27.0) i6 nan i nan 6 nan nan nan nan m True True (-3, 1, 0) () 0 -3 nan nan
[27.0, 29.0) iio6 nan ii o 6 nan nan nan nan o True True (-1, -4, 2) () 2 -1 nan nan
[29.0, 30.0) V(4) nan V nan nan 4 nan nan } M True True (1, 0, 2) () 1 1 nan nan
[30.0, 32.0) V nan V nan nan nan nan HC nan M True True (1, 5, 2) () 1 1 nan nan
[32.0, 41.0) v nan v nan nan nan nan nan { m True True (1, -2, 2) () 1 1 nan nan
[41.0, 45.0) IVM2 nan IV M 2 nan nan nan nan MM7 True False (4, -1, 3, 0) () -1 4 nan nan

Metadata#

dataset_metadata = hascadence.data.metadata()
hascadence_metadata = dataset_metadata.loc[hascadence.indices[()]]
hascadence_metadata.index.rename('dataset', level=0, inplace=True)
hascadence_metadata.head()
TimeSig KeySig last_mc last_mn length_qb last_mc_unfolded last_mn_unfolded length_qb_unfolded volta_mcs all_notes_qb ... imslp.1 key mode typesetter electronic editor electronic encoder text pdf score integrity PDF
dataset fname
beethoven_piano_sonatas 01-1 1: 2/2 1: -4 154 152 608.0 308.0 304.0 1216.0 NaN 1476.00 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
01-2 1: 3/4 1: -1 62 61 183.0 124.0 122.0 366.0 NaN 526.17 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
01-3 1: 3/4 1: -4, 43: -1 77 73 219.0 196.0 186.0 558.0 NaN 565.50 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
01-4 1: 2/2 1: -4 199 196 790.0 392.0 390.0 1560.0 (((57, 58), (59, 60, 61)),) 2326.83 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
02-1 1: 2/4 1: 3, 127: 0, 230: 3 342 336 679.5 672.0 664.0 1336.0 (((115, 116, 117, 118), (119, 120, 121, 122, 1... 1695.75 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN

5 rows × 71 columns

mean_composition_years = hascadence_metadata.groupby(level=0).composed_end.mean().astype(int).sort_values()
chronological_order = mean_composition_years.index.to_list()
bar_data = pd.concat([mean_composition_years.rename('year'),
                      hascadence_metadata.groupby(level='dataset').size().rename('pieces')],
                     axis=1
                    ).reset_index()
fig = px.bar(bar_data, x='year', y='pieces', color='dataset', title='Pieces contained in the dataset')
fig.update_traces(width=5)

Overall#

  • PAC: Perfect Authentic Cadence

  • IAC: Imperfect Authentic Cadence

  • HC: Half Cadence

  • DC: Deceptive Cadence

  • EC: Evaded Cadence

  • PC: Plagal Cadence

print(f"{all_labels.cadence.notna().sum()} cadence labels.")
value_count_df(all_labels.cadence)
5117 cadence labels.
counts %
cadence
PAC 2508 0.490131
HC 1478 0.288841
IAC 838 0.163768
EC 155 0.030291
DC 77 0.015048
PC 61 0.011921
px.pie(all_labels[all_labels.cadence.notna()], names="cadence", color="cadence", color_discrete_map=CADENCE_COLORS)

Per dataset#

cadence_count_per_dataset = all_labels.groupby("corpus").cadence.value_counts()
cadence_fraction_per_dataset = cadence_count_per_dataset / cadence_count_per_dataset.groupby(level=0).sum()
px.bar(cadence_fraction_per_dataset.rename('count').reset_index(), x='corpus', y='count', color='cadence',
      color_discrete_map=CADENCE_COLORS, category_orders=dict(dataset=chronological_order))
fig = px.pie(cadence_count_per_dataset.rename('count').reset_index(), names='cadence', color='cadence', values='count',
       facet_col='corpus', facet_col_wrap=4, height=2000, color_discrete_map=CADENCE_COLORS)
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
fig.update_layout(**STD_LAYOUT)

Per phrase#

Number of cadences per phrase#

segmented = dc.PhraseSlicer().process_data(grouped_by_corpus)
phrases = segmented.get_slice_info()
phrase_segments = segmented.get_facet("expanded")
phrase_gpb = phrase_segments.groupby(level=[0,1,2])
local_keys_per_phrase = phrase_gpb.localkey.unique().map(tuple)
n_local_keys_per_phrase = local_keys_per_phrase.map(len)
phrases_with_keys = pd.concat([n_local_keys_per_phrase.rename('n_local_keys'),
                               local_keys_per_phrase.rename('local_keys'),
                               phrases], axis=1)
phrases_with_cadences = pd.concat([
    phrase_gpb.cadence.nunique().rename('n_cadences'),
    phrase_gpb.cadence.unique().rename('cadences').map(lambda l: tuple(e for e in l if not pd.isnull(e))),
    phrases_with_keys
], axis=1)
value_count_df(phrases_with_cadences.n_cadences, counts="#phrases")
#phrases %
n_cadences
1 4571 0.811756
0 867 0.153969
2 183 0.032499
3 10 0.001776
n_cad = phrases_with_cadences.groupby(level='corpus').n_cadences.value_counts().rename('counts').reset_index().sort_values('n_cadences')
n_cad.n_cadences = n_cad.n_cadences.astype(str)
fig = px.bar(n_cad, x='corpus', y='counts', color='n_cadences', height=800, barmode='group',
             labels=dict(n_cadences="#cadences in a phrase"),
             category_orders=dict(dataset=chronological_order)
      )
fig.show()

Combinations of cadence types for phrases with more than one cadence#

value_count_df(phrases_with_cadences[phrases_with_cadences.n_cadences > 1].cadences)
counts %
cadences
(EC, PAC) 47 0.243523
(HC, PAC) 43 0.222798
(DC, PAC) 31 0.160622
(IAC, PAC) 21 0.108808
(PAC, HC) 9 0.046632
(EC, HC) 8 0.041451
(HC, IAC) 4 0.020725
(HC, DC, PAC) 4 0.020725
(IAC, EC) 3 0.015544
(DC, HC) 3 0.015544
(EC, IAC) 3 0.015544
(HC, EC) 3 0.015544
(IAC, HC) 2 0.010363
(DC, EC, PAC) 2 0.010363
(PC, PAC) 2 0.010363
(IAC, DC, PAC) 1 0.005181
(HC, IAC, PAC) 1 0.005181
(PAC, IAC) 1 0.005181
(HC, PC) 1 0.005181
(EC, DC, PAC) 1 0.005181
(HC, DC) 1 0.005181
(DC, IAC) 1 0.005181
(DC, HC, PAC) 1 0.005181

Positioning of cadences within phrases#

df_rows = []
y_position = 0
for ix in phrases_with_cadences[phrases_with_cadences.n_cadences > 0].sort_values('duration_qb').index:
    df = phrase_segments.loc[ix]
    description = str(ix)
    if df.cadence.notna().any():
        interval = ix[2]
        df_rows.append((y_position, interval.length, "end of phrase", description))
        start_pos = interval.left
        cadences = df.loc[df.cadence.notna(), ['quarterbeats', 'cadence']]
        cadences.quarterbeats -= start_pos
        for cadence_x, cadence_type in cadences.itertuples(index=False, name=None):
            df_rows.append((y_position, cadence_x, cadence_type, description))
        y_position += 1
    #else:
    #    df_rows.append((y_position, pd.NA, pd.NA, description))

data = pd.DataFrame(df_rows, columns=["phrase_ix", "x", "marker", "description"])
fig = px.scatter(data[data.x.notna()], x='x', y="phrase_ix", color="marker", hover_name="description", height=3000,
                labels=dict(marker='legend'), color_discrete_map=CADENCE_COLORS)
fig.update_traces(marker_size=5)
fig.update_yaxes(autorange="reversed")
fig.show()

Cadence ultima#

phrase_segments = segmented.get_facet("expanded")
cadence_selector = phrase_segments.cadence.notna()
missing_chord_selector = phrase_segments.chord.isna()
cadence_with_missing_chord_selector = cadence_selector & missing_chord_selector
missing = phrase_segments[cadence_with_missing_chord_selector]
expanded = ms3.expand_dcml.expand_labels(phrase_segments[cadence_with_missing_chord_selector], propagate=False, chord_tones=True, skip_checks=True)
phrase_segments.loc[cadence_with_missing_chord_selector] = expanded
print(f"Ultima harmony missing for {(phrase_segments.cadence.notna() & phrase_segments.bass_note.isna()).sum()} cadence labels.")
Ultima harmony missing for 37 cadence labels.

Ultimae as Roman numeral#

def highlight(row, color="#ffffb3"):
    if row.counts < 10:
        return [None, None, None, None]
    else:
        return ["background-color: {color};"] * 4

cadence_counts = all_labels.cadence.value_counts()
ultima_root = phrase_segments.groupby(['localkey_is_minor', 'cadence']).numeral.value_counts().rename('counts').to_frame().reset_index()
ultima_root.localkey_is_minor = ultima_root.localkey_is_minor.map({False: 'in major', True: 'in minor'})
#ultima_root.style.apply(highlight, axis=1)
fig = px.pie(ultima_root, names='numeral', values='counts',
             facet_row='cadence', facet_col='localkey_is_minor',
             height=1500,
             category_orders={'cadence': cadence_counts.index},
            )
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
fig.update_traces(textposition='inside', textinfo='percent+label')
fig.update_layout(**STD_LAYOUT)
fig.show()
#phrase_segments.groupby(level=[0,1,2], group_keys=False).apply(lambda df: df if ((df.cadence == 'PAC') & (df.numeral == 'V')).any() else None)

Ultimae bass note as scale degree#

ultima_bass = phrase_segments.groupby(['localkey_is_minor','cadence']).bass_note.value_counts().rename('counts').reset_index()
ultima_bass.bass_note = ms3.transform(ultima_bass, ms3.fifths2sd, dict(fifths='bass_note', minor='localkey_is_minor'))
ultima_bass.localkey_is_minor = ultima_bass.localkey_is_minor.map({False: 'in major', True: 'in minor'})
#ultima_bass.style.apply(highlight, axis=1)
fig = px.pie(ultima_bass, names='bass_note', values='counts',
             facet_row='cadence', facet_col='localkey_is_minor',
             height=1500,
             category_orders={'cadence': cadence_counts.index},
            )
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
fig.update_traces(textposition='inside', textinfo='percent+label')
fig.update_layout(**STD_LAYOUT)
fig.show()

Chord progressions#

PACs with ultima I/i#

#pac_on_i = phrase_segments.groupby(level=[0,1,2], group_keys=False).apply(lambda df: df if ((df.cadence == 'PAC') & (df.numeral.isin(('I', 'i')))).any() else None)
#pac_on_i.cadence.value_counts()
#pac_on_i.droplevel(-1).index.nunique()
def get_progressions(selected='PAC', last_row={}, feature='chord', dataset=None, as_series=True):
    """Uses the nonlocal variable phrase_segments."""
    last_row = {k: v if isinstance(v, tuple) else (v,) for k, v in last_row.items()}
    progressions = []

    for (corp, fname, *_), df in phrase_segments[phrase_segments[feature].notna()].groupby(level=[0,1,2]):
        if dataset is not None and dataset not in corp:
            continue
        if (df.cadence == selected).fillna(False).any():
            # remove chords after the last cadence label
            df = df[df.cadence.fillna(method='bfill').notna()]
            # group segments leading up to a cadence label
            cadence_groups = df.cadence.notna().shift().fillna(False).cumsum()
            for i, cadence in df.groupby(cadence_groups):
                last_r = cadence.iloc[-1]
                typ = last_r.cadence
                if typ != selected:
                    continue
                if any(last_r[feat] not in values for feat, values in last_row.items()):
                    continue
                progressions.append(tuple(cadence[feature]))
    if as_series:
        return pd.Series(progressions)
    return progressions
chord_progressions = get_progressions('PAC', dict(numeral=('I', 'i')), 'chord')
print(f"Progressions for {len(chord_progressions)} cadences:")
value_count_df(chord_progressions, "chord progressions")
Progressions for 2473 cadences:
counts %
chord progressions
(I, V7, I) 14 0.005661
(V, V7, I, ii6(2), ii6, V7, I) 12 0.004852
(I, V(64), V7, I) 11 0.004448
(i, VM7, i, V7, V7(#2), V7, i) 8 0.003235
(i6, i, #viio6, VI6, V6, vo6, IV6, #viio2, V(6), #viio64, i) 6 0.002426
... ... ...
(I, V, I, V6, vi, V, IV, iii65, vi, ii65, V, I) 1 0.000404
(V, V6/iv, IV(9), ii, V7, i, IV, V(4), V, i) 1 0.000404
(I, V, I, I6, I, I6, I, #viio6, i) 1 0.000404
(I6, viio6, I) 1 0.000404
(V65(b9)/ii, ii, V65(b9), I, ii6, viio7/V, V7(9), V7, I(9), I) 1 0.000404

2005 rows × 2 columns

numeral_progressions = get_progressions('PAC', dict(numeral=('I', 'i')), 'numeral')
value_count_df(numeral_progressions, "numeral progressions")
counts %
numeral progressions
(I, V, V, I) 23 0.009300
(I, V, I) 16 0.006470
(V, V, I, ii, ii, V, I) 12 0.004852
(I, ii, V, V, I) 11 0.004448
(I, V, I, V, I, V, I, V, I) 9 0.003639
... ... ...
(i, vi, V, ii, V, V, I, IV, I, IV, V, V, I) 1 0.000404
(i, VI, ii, V, i, V, V, i) 1 0.000404
(iii, vi, ii, V, I, IV, V, V, I) 1 0.000404
(VI, iv, v, v, III, iv, iv, ii, V, i, ii, V, V, i) 1 0.000404
(V, ii, V, I, ii, vii, V, V, I, I) 1 0.000404

1858 rows × 2 columns

def remove_immediate_duplicates(l):
    return tuple(a for a, b in zip(l, (None, ) + l) if a != b)

numeral_prog_no_dups = numeral_progressions.map(remove_immediate_duplicates)
value_count_df(numeral_prog_no_dups)
counts %
(I, V, I) 65 0.026284
(I, ii, V, I) 30 0.012131
(I, V, I, V, I) 23 0.009300
(i, V, i, V, i) 21 0.008492
(i, V, i) 17 0.006874
... ... ...
(I, V, iv, i, V, i, V, iv, ii, V, i, iv, V, i) 1 0.000404
(I, ii, V, IV, V, I) 1 0.000404
(V, i, ii, V, I, ii, V, IV, V, I) 1 0.000404
(i, V, iv, V, i, #vii, iv, V, i) 1 0.000404
(V, ii, V, I, ii, vii, V, I) 1 0.000404

1619 rows × 2 columns

PACs ending on scale degree 1#

Scale degrees expressed w.r.t. major scale, regardless of actual key.

bass_progressions = get_progressions('PAC', dict(bass_note=0), 'bass_note')
bass_prog = bass_progressions.map(ms3.fifths2sd)
print(f"Progressions for {len(bass_progressions)} cadences:")
value_count_df(bass_prog, "bass progressions")
Progressions for 2320 cadences:
counts %
bass progressions
(1, 5, 1) 24 0.010345
(1, 5, 5, 1) 16 0.006897
(1, 4, 5, 5, 1) 13 0.005603
(5, 5, 1, 4, 4, 5, 1) 12 0.005172
(1, 2, 3, 4, 5, 5, 1) 10 0.004310
... ... ...
(5, 6, 6, 7, 1, 3, 3, 1, 6, 7, 7, 1, 3, 6, 6, 5, 3, 4, 4, 2, 3, 3, 1, 2, 2, 5, 1) 1 0.000431
(b3, 1, 2, b3, 5, 6, 7, 1, 1, 5, 4, 5, 5, 1) 1 0.000431
(b3, 1, 5, b6, b6, 5, b3, 1, 2, b3, 5, 6, 7, 1, 1, 5, 4, 5, 5, 1) 1 0.000431
(1, 6, 6, 7, 3, 6, 7, 1, 1, 3, 4, 5, 1) 1 0.000431
(#1, 2, 7, 1, 4, #4, 5, 5, 1, 1) 1 0.000431

1736 rows × 2 columns

bass_prog_no_dups = bass_prog.map(remove_immediate_duplicates)
value_count_df(bass_prog_no_dups)
counts %
(1, 5, 1) 62 0.026724
(1, 5, 1, 5, 1) 30 0.012931
(1, 4, 5, 1) 26 0.011207
(1, 2, 3, 4, 5, 1) 21 0.009052
(5, 1, 4, 5, 1) 17 0.007328
... ... ...
(1, 5, #5, 6, 3, 4, 5, 6, 4, 5, b3, 4, 5, 1) 1 0.000431
(1, 5, 3, 4, 2, 3, 1, 7, 6, 5, 1) 1 0.000431
(3, 4, #4, 5, 3, 4, 5, 1) 1 0.000431
(1, 7, 1, 5, 1, 2, b3, 7, 1, 4, b7, b3, b6, 2, 5, #4, 5, 1) 1 0.000431
(#1, 2, 7, 1, 4, #4, 5, 1) 1 0.000431

1565 rows × 2 columns

def make_sankey(data, labels, node_pos=None, margin={'l': 10, 'r': 10, 'b': 10, 't': 10}, pad=20, color='auto', **kwargs):
    if color=='auto':
        unique_labels = set(labels)
        color_step = 100 / len(unique_labels)
        unique_colors = {label: f'hsv({round(i*color_step)}%,100%,100%)' for i, label in enumerate(unique_labels)}
        color = list(map(lambda l: unique_colors[l], labels))
    fig = go.Figure(go.Sankey(
        arrangement = 'snap',
        node = dict(
          pad = pad,
          #thickness = 20,
          #line = dict(color = "black", width = 0.5),
          label = labels,
          x = [node_pos[i][0] if i in node_pos else 0 for i in range(len(labels))] if node_pos is not None else None,
          y = [node_pos[i][1] if i in node_pos else 0 for i in range(len(labels))] if node_pos is not None else None,
          color = color,
          ),
        link = dict(
          source = data.source,
          target = data.target,
          value = data.value
          ),
        ),
     )

    fig.update_layout(margin=margin, **kwargs)
    return fig

def progressions2graph_data(progressions, cut_at_stage=None):
    stage_nodes = defaultdict(dict)
    edge_weights = Counter()
    node_counter = 0
    for progression in progressions:
        previous_node = None
        for stage, current in enumerate(reversed(progression)):
            if cut_at_stage and stage > cut_at_stage:
                break
            if current in stage_nodes[stage]:
                current_node = stage_nodes[stage][current]
            else:
                stage_nodes[stage][current] = node_counter
                current_node = node_counter
                node_counter += 1
            if previous_node is not None:
                edge_weights.update([(current_node, previous_node)])
            previous_node = current_node
    return stage_nodes, edge_weights

def graph_data2sankey(stage_nodes, edge_weights):
    data = pd.DataFrame([(u, v, w) for (u, v), w in edge_weights.items()], columns = ['source', 'target', 'value'])
    node2label = {node: label for stage, nodes in stage_nodes.items() for label, node in nodes.items()}
    labels = [node2label[i] for i in range(len(node2label))]
    return make_sankey(data, labels)

def plot_progressions(progressions, cut_at_stage=None):
    stage_nodes, edge_weights = progressions2graph_data(progressions, cut_at_stage=cut_at_stage)
    return graph_data2sankey(stage_nodes, edge_weights)

plot_progressions(numeral_prog_no_dups, cut_at_stage=3)
chord_progressions_minor = get_progressions('PAC', dict(numeral='i', localkey_is_minor=True), 'root')
chord_progressions_minor
0      (1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 5, ...
1                                           (6, 1, 1, 0)
2                                        (0, 6, 1, 1, 0)
3      (0, 6, 1, 1, 0, -1, -2, -3, -4, 2, 1, 0, -4, 2...
4                               (0, 5, 0, 5, 0, 1, 1, 0)
                             ...
813                   (1, 0, 1, 0, 0, -1, 6, 1, 1, 1, 0)
814                           (-1, -1, 1, 1, 1, 0, 0, 0)
815                                         (2, 0, 0, 0)
816                                  (-1, 0, 2, 0, 0, 0)
817                                (0, 1, 1, 0, 0, 0, 0)
Length: 818, dtype: object
pac_major = get_progressions('PAC', dict(numeral='I', localkey_is_minor=False), 'chord')
plot_progressions(pac_major, cut_at_stage=4)
deceptive = get_progressions('DC', dict(localkey_is_minor=False), 'chord')
deceptive.value_counts()
(I, V43(4), I6, IV, V7, vi)                                                                                                                                                                                                                                                                                       3
(I, ii7, I6, I, IV, I, ii7, I6, I, IV, I, IV, V, #viio7/vi, vi)                                                                                                                                                                                                                                                   2
(I6, ii6(#72), ii6, V(9), V7, vi(6), vi, I6, ii6, V(64), V7, vi)                                                                                                                                                                                                                                                  2
(I, IV, V/vi, vi, V, I, IV, V(64), V7, vi)                                                                                                                                                                                                                                                                        2
(I, ii, V7, I, I6, IV, ii6, V(64), V7, vi)                                                                                                                                                                                                                                                                        2
(V, I, I6, IV, V, vi)                                                                                                                                                                                                                                                                                             2
(I, V, ii7(9), V, V, ii7(9), V, V7, ii7(9), V, ii7, V, ii7(13), ii7, V7(#2), V7(6), vi)                                                                                                                                                                                                                           2
(V43, I(4), V65, I, V2(6), V2, I6, #viio7/ii, iv64/ii, #viio65/ii, i6(6)/ii, #viio43/ii, i6/ii, vi, vii%2, vi, ii%43, I64, I6, V7/V, V7, V6/vi, vi)                                                                                                                                                               2
(I6, ii6, V6/V, iii6, V6/vi, IV6, viio, V2, I6, V(64), #viio7/vi, vi(64), vi)                                                                                                                                                                                                                                     2
(I, V, I, IV6, ii, V7, bVI)                                                                                                                                                                                                                                                                                       1
(V, I, vi, I6, ii6, ii, V, I6, IV, V, viio6/V)                                                                                                                                                                                                                                                                    1
(viio6/V, V, vii%43, I6, ii6, V, viio6/V)                                                                                                                                                                                                                                                                         1
(V7, viio2, V7, viio2, V7, viio2, V7, viio2, V7, viio2, V7, viio2, V, viio2, V, viio2, V7, viio2, V, viio64, I6, I, V65, I, V65, I, V65, I, V65, I, IV6, V, #viio7/vi, vi)                                                                                                                                        1
(V, viio2, V7, viio2, V7, viio2, V7, viio2, V7, viio2, V7, viio2, V, viio2, V, viio2, V7, viio2, V, viio64, I6, I, V65, I, V65, I, V65, I, V65, I, ii6, V, #viio7/vi, vi)                                                                                                                                         1
(I, V43, viio64, I, V43, viio64, I, V2, I, V2, I, V2, I, V2, I, #viio7/vi, vi(64), vi)                                                                                                                                                                                                                            1
(I, I6, V7, I, V43, I, V6, I, V65, V7, I, V6, I, V43, I, V65, V7, I6, viio6, I, V2, I6, IV, IV6, V(64), V7, #viio7/vi, vi)                                                                                                                                                                                        1
(I, vi6, ii(4), V7, vi)                                                                                                                                                                                                                                                                                           1
(I, It6/ii, V/ii, #viio43/ii, ii6, V(64), V7(4), #viio7/vi, vi)                                                                                                                                                                                                                                                   1
(viio, V65, I, V7/IV, ii6, V(64), V7(4), V7, viio6/V)                                                                                                                                                                                                                                                             1
(ii, V/ii, V65/ii, ii, V, V65, I(4), I, ii6(2), ii6, V(13), V(64), V7(4), V, viio6/V)                                                                                                                                                                                                                             1
(ii, V/ii, V65/ii, ii, V, V65, I(4), I, ii6(2), ii6, V(13), V(64), V7(4), V, #viio43/ii, #viio7/v, viio43, #viio65/ii, ii6(11), ii6, V(13), V(64), V7(4), V, viio6/V)                                                                                                                                             1
(vii%7, V65, I, ii6, V(64), #viio7/vi, vi)                                                                                                                                                                                                                                                                        1
(V7/V, V, V7, I, ii6, V(64), V7, V43/V)                                                                                                                                                                                                                                                                           1
(V43/V, V65, I6, ii6, V(64), V7, #viio7/vi, V43/V)                                                                                                                                                                                                                                                                1
(IV64, I, IV64, I, IV6, V6(#4), V6, I, I6(7), I6, ii6, V7, #viio7/vi, vi)                                                                                                                                                                                                                                         1
(vi, #viio43/ii, ii6, V7, V(64), V7, #viio7/vi, vi)                                                                                                                                                                                                                                                               1
(I, V43, I, V43, I, V43, I6, ii6, V(64), V7, vi)                                                                                                                                                                                                                                                                  1
(V7, i, V, i, V7, i, V, I, V7, I, ii, V7(4), V7, V65/vi, vi)                                                                                                                                                                                                                                                      1
(I, V43, I, V43, I, viio6, I6, ii6, V(64), V7, vi)                                                                                                                                                                                                                                                                1
(ii, V(64)/vi, It6/vi, V/vi, V7/V, V, V7/IV, IV, ii, V7(^9), V7, vi)                                                                                                                                                                                                                                              1
(#viio43/ii, ii, IV, V(64), V7, I)                                                                                                                                                                                                                                                                                1
(V(974), V, I, IV, V/vi, vi, I, IV, V(64), V7, vi)                                                                                                                                                                                                                                                                1
(I64, IV6, #viio2/vi, vi, vii%2, I, IV, V(64), V7, vi)                                                                                                                                                                                                                                                            1
(I, viio6, I6, IV, I, viio6, I6, IV, I, viio6, I6, IV, I, viio6, I6, viio/V, V, V(64), V7, V(64), V7, I, I6, ii6, V6/V, V(64), V(4), V7, I, I6, ii6, V6/V, V(64), V(4), V, bVI)                                                                                                                                   1
(V, V7, IV(+2), V7, vi)                                                                                                                                                                                                                                                                                           1
(i6/iv, iv/iv, iio6/iv, V(64)/iv, V7/iv, bII)                                                                                                                                                                                                                                                                     1
(I, V2, I, V2, I, V2, I, V2, I, V2, I6, I, viio6, viio, vi6, vi, V6, V, IV6, IV, iii6, iii, ii6, ii, I6, I, V6, IV6, iii6, ii6, I6, viio6, I, V, ii, vi, IV, I, V(4), V, I, V, ii, vi, I/IV, V2/IV, I/IV, V2/IV, I/IV, V7, V(64), V, V7, I, V6, IV6, iii6, ii6, I6, viio6, I, V6, IV6, iii6, ii6, I6, V7, bVI)    1
(V(64)/vi, viio65/V/vi, V/vi, iii, V43, I, V6, V7, #viio43/ii, V7, bVI)                                                                                                                                                                                                                                           1
(I, ii2, I, I6, ii65, V(64), V, vi)                                                                                                                                                                                                                                                                               1
(I, I, V, I, vi, V/vi, vi, IV, V/IV, IV, ii, V/ii, ii, V, vi7)                                                                                                                                                                                                                                                    1
(I, V2, I6, IV, V6/V, V, V6/vi, i/vi, V(64)/vi, V/vi, VI/vi)                                                                                                                                                                                                                                                      1
(I, V6(#11), V6, V65, I(#9), I, V7/IV, IV(#9), IV, I(#11), I, I6, IV, V(64), V7, #viio7/vi, vi)                                                                                                                                                                                                                   1
(V, Ger6, V, viio7/V, V2, I6, IV, V(64), V7, vi)                                                                                                                                                                                                                                                                  1
(V2/V, V6, V2/IV, IV6, vi, ii, V7(^9), V7, I/bVI)                                                                                                                                                                                                                                                                 1
(I, I(64), I, V7, I(#97), I, I(64), I, V7, I(#97), I, vi, viio6/V, V2/V, V6(4), V6, V, IV6, viio, I, viio6, viio/IV, IV, iv, I64)                                                                                                                                                                                 1
dtype: int64
plot_progressions(deceptive, cut_at_stage=4)
plot_progressions(bass_prog_no_dups, cut_at_stage=7)
def remove_sd_accidentals(t):
    return tuple(map(lambda sd: sd[-1], t))

bass_prog_no_acc_no_dup = bass_prog.map(remove_sd_accidentals).map(remove_immediate_duplicates)
plot_progressions(bass_prog_no_acc_no_dup, cut_at_stage=7)

HCs ending on V#

half = get_progressions('HC', dict(numeral='V'), 'bass_note').map(ms3.fifths2sd)
print(f"Progressions for {len(half)} cadences:")
plot_progressions(half.map(remove_immediate_duplicates), cut_at_stage=5)
Progressions for 1409 cadences: